The material in this workshop is strongly influenced by the following free online materials:
#assign function
assign('w', 0)
w
## [1] 0
#left arrow
x <- 1
x
## [1] 1
#equal
y = 2
y
## [1] 2
#right arrow
3 -> z
z
## [1] 3
w + x + y + z
## [1] 6
Vectors are single dimension data collections of the same type.
#use `c` function to combine individual datum together in a vector
#character
char_vector <- c('a', 'b', 'c', 'd')
char_vector
## [1] "a" "b" "c" "d"
#for numbers, you can get a vector range by using `:`
#integer
int_vector <- 1:10
int_vector
## [1] 1 2 3 4 5 6 7 8 9 10
#combining int and char vectors returns a character
combo_int_char <- c(char_vector, int_vector)
typeof(combo_int_char)
## [1] "character"
combo_int_char
## [1] "a" "b" "c" "d" "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
#double
dbl_vector <- seq(1, 2, by=.2)
dbl_vector
## [1] 1.0 1.2 1.4 1.6 1.8 2.0
#logical/boolean
logical_vector <- c(FALSE, TRUE, F, T)
logical_vector
## [1] FALSE TRUE FALSE TRUE
Items can be extracted from vector using square bracket [index] notation. R is a 1 index language and the first item in most data structures is in position 1.
#first element of char_vector
char_vector[1]
## [1] "a"
#last element of int_vector (calculate the size of vector with `length()`)
int_vector[length(int_vector)]
## [1] 10
Matrices are two dimensional data collections of the same type.
matrix(1:10)
## [,1]
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 5
## [6,] 6
## [7,] 7
## [8,] 8
## [9,] 9
## [10,] 10
matrix(1:10, ncol = 2)
## [,1] [,2]
## [1,] 1 6
## [2,] 2 7
## [3,] 3 8
## [4,] 4 9
## [5,] 5 10
myMatrix <- matrix(1:10, ncol = 2, byrow = T)
myMatrix
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
## [3,] 5 6
## [4,] 7 8
## [5,] 9 10
matrix(sample(0:1, size = 100, replace = T), ncol = 10, nrow = 10)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 0 0 0 0 1 0 1 1 0 0
## [2,] 0 1 0 1 1 1 1 0 0 0
## [3,] 0 0 0 0 0 1 1 0 0 1
## [4,] 0 0 1 1 1 1 1 1 0 1
## [5,] 0 0 0 0 1 1 0 0 1 1
## [6,] 0 1 1 0 0 1 0 0 1 1
## [7,] 1 1 0 1 0 0 1 0 0 1
## [8,] 1 1 0 0 0 0 1 1 1 1
## [9,] 0 1 1 0 0 1 1 0 1 0
## [10,] 1 0 0 1 1 0 0 1 0 0
Items can be retrieved from a matrix by using the square bracket [row, column] notation
#first row
myMatrix[1,]
## [1] 1 2
#first column
myMatrix[,1]
## [1] 1 3 5 7 9
#first row first column
myMatrix[1,1]
## [1] 1
Lists are the R objects which contain elements of different types like − numbers, strings, vectors and another list inside it. A list can also contain a matrix or a function as its elements.
myList <- list(
char_vector,
int_vector,
dbl_vector,
logical_vector,
myMatrix
)
myList
## [[1]]
## [1] "a" "b" "c" "d"
##
## [[2]]
## [1] 1 2 3 4 5 6 7 8 9 10
##
## [[3]]
## [1] 1.0 1.2 1.4 1.6 1.8 2.0
##
## [[4]]
## [1] FALSE TRUE FALSE TRUE
##
## [[5]]
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
## [3,] 5 6
## [4,] 7 8
## [5,] 9 10
Items can be retrieved from a list by using double bracket notation [[index]].
myList[[1]]
## [1] "a" "b" "c" "d"
The indexes in a list can also be named. If they are named, then you can use dollar sign notation $name to retrieve an item from the list.
myNamedList <- list(
"char" = char_vector,
"int" = int_vector,
"dbl" = dbl_vector,
"lgl" = logical_vector,
"mat" = myMatrix,
"list" = myList
)
myNamedList$char
## [1] "a" "b" "c" "d"
A data frame is used for storing data tables. It is a list of vectors of equal length. It should resemble a typical table structure similar to an Excel worksheet.
myDF <- data.frame(
char = c('a', 'b', 'c', 'd', 'e', 'f'),
num = 1:6,
lgl = rep(c(T, F), 3)
)
myDF
## char num lgl
## 1 a 1 TRUE
## 2 b 2 FALSE
## 3 c 3 TRUE
## 4 d 4 FALSE
## 5 e 5 TRUE
## 6 f 6 FALSE
myDF[1:3,'num']
## [1] 1 2 3
myDF$num[1:3]
## [1] 1 2 3
myDF[1:3, c('num', 'lgl')]
## num lgl
## 1 1 TRUE
## 2 2 FALSE
## 3 3 TRUE
To explore functions from the tidyverse we will create two dataframes.
state_df <- data.frame(
NAME = state.name,
ABB = state.abb,
AREA = state.area
)
head(state_df)
## NAME ABB AREA
## 1 Alabama AL 51609
## 2 Alaska AK 589757
## 3 Arizona AZ 113909
## 4 Arkansas AR 53104
## 5 California CA 158693
## 6 Colorado CO 104247
state_regions <- data.frame(
NAME = c(state.name, 'District of Columbia'),
REGION = c(as.character(state.region), 'South')
)
head(state_regions)
## NAME REGION
## 1 Alabama South
## 2 Alaska West
## 3 Arizona West
## 4 Arkansas South
## 5 California West
## 6 Colorado West
%>%Of the Tidyverse packages, the only one we will explicitly load into our environment is magrittr
library(magrittr)
state_regions %>%
dplyr::filter(REGION == 'Northeast')
## NAME REGION
## 1 Connecticut Northeast
## 2 Maine Northeast
## 3 Massachusetts Northeast
## 4 New Hampshire Northeast
## 5 New Jersey Northeast
## 6 New York Northeast
## 7 Pennsylvania Northeast
## 8 Rhode Island Northeast
## 9 Vermont Northeast
state_df %>%
dplyr::filter(AREA >= 150000)
## NAME ABB AREA
## 1 Alaska AK 589757
## 2 California CA 158693
## 3 Texas TX 267339
state_df %>%
dplyr::filter(ABB %in% c("PA", "OH", "MD"))
## NAME ABB AREA
## 1 Maryland MD 10577
## 2 Ohio OH 41222
## 3 Pennsylvania PA 45333
stringr::str_detect(string = 'New York', pattern = 'New')
## [1] TRUE
state_df %>%
dplyr::filter(stringr::str_detect(NAME, 'New'))
## NAME ABB AREA
## 1 New Hampshire NH 9304
## 2 New Jersey NJ 7836
## 3 New Mexico NM 121666
## 4 New York NY 49576
state_df %>%
dplyr::left_join(state_regions, by = 'NAME') %>%
tail()
## Warning: Column `NAME` joining factors with different levels, coercing to
## character vector
## NAME ABB AREA REGION
## 45 Vermont VT 9609 Northeast
## 46 Virginia VA 40815 South
## 47 Washington WA 68192 West
## 48 West Virginia WV 24181 South
## 49 Wisconsin WI 56154 North Central
## 50 Wyoming WY 97914 West
state_df %>%
dplyr::full_join(state_regions, by = 'NAME') %>%
tail()
## Warning: Column `NAME` joining factors with different levels, coercing to
## character vector
## NAME ABB AREA REGION
## 46 Virginia VA 40815 South
## 47 Washington WA 68192 West
## 48 West Virginia WV 24181 South
## 49 Wisconsin WI 56154 North Central
## 50 Wyoming WY 97914 West
## 51 District of Columbia <NA> NA South
state_regions %>%
dplyr::anti_join(state_df, by = 'NAME')
## Warning: Column `NAME` joining factors with different levels, coercing to
## character vector
## NAME REGION
## 1 District of Columbia South
iterates through the rows of a table.
state_df$AREA %>%
head() %>%
purrr::map(sqrt)
## [[1]]
## [1] 227.1761
##
## [[2]]
## [1] 767.9564
##
## [[3]]
## [1] 337.5041
##
## [[4]]
## [1] 230.4431
##
## [[5]]
## [1] 398.3629
##
## [[6]]
## [1] 322.873
state_df$AREA %>%
head() %>%
purrr::map_dbl(sqrt)
## [1] 227.1761 767.9564 337.5041 230.4431 398.3629 322.8730
state_df %>%
head() %>%
{
purrr::map2_chr(.$NAME, .$ABB, function(x, y){
paste(x, 'is', y)
})
}
## [1] "Alabama is AL" "Alaska is AK" "Arizona is AZ"
## [4] "Arkansas is AR" "California is CA" "Colorado is CO"
state_df %>%
head() %>%
{
purrr::pmap_chr(., function(NAME, ABB, AREA){
paste(NAME, 'or', ABB, 'has an area of', AREA )
})
}
## [1] "Alabama or AL has an area of 51609"
## [2] "Alaska or AK has an area of 589757"
## [3] "Arizona or AZ has an area of 113909"
## [4] "Arkansas or AR has an area of 53104"
## [5] "California or CA has an area of 158693"
## [6] "Colorado or CO has an area of 104247"
nested_regions <- state_regions %>%
dplyr::group_by(REGION) %>%
tidyr::nest()
nested_regions
## # A tibble: 4 x 2
## REGION data
## <fct> <list>
## 1 South <tibble [17 × 1]>
## 2 West <tibble [13 × 1]>
## 3 Northeast <tibble [9 × 1]>
## 4 North Central <tibble [12 × 1]>
new_regions <- nested_regions %>%
dplyr::mutate(n_new_in_region = purrr::map_dbl(data, function(d){
d %>%
dplyr::filter(stringr::str_detect(NAME, 'New')) %>%
nrow()
}))
new_regions
## # A tibble: 4 x 3
## REGION data n_new_in_region
## <fct> <list> <dbl>
## 1 South <tibble [17 × 1]> 0
## 2 West <tibble [13 × 1]> 1
## 3 Northeast <tibble [9 × 1]> 3
## 4 North Central <tibble [12 × 1]> 0
new_regions %>%
tidyr::unnest() %>%
dplyr::arrange(desc(n_new_in_region))
## # A tibble: 51 x 3
## REGION n_new_in_region NAME
## <fct> <dbl> <fct>
## 1 Northeast 3 Connecticut
## 2 Northeast 3 Maine
## 3 Northeast 3 Massachusetts
## 4 Northeast 3 New Hampshire
## 5 Northeast 3 New Jersey
## 6 Northeast 3 New York
## 7 Northeast 3 Pennsylvania
## 8 Northeast 3 Rhode Island
## 9 Northeast 3 Vermont
## 10 West 1 Alaska
## # … with 41 more rows
stringr::str_replace(string = 'Philadelphia Metro Area', pattern = ' Metro Area', replacement = '')
## [1] "Philadelphia"
library(sf)
library(spData)
point_mat <- matrix(c(1, 3, 5, 1, 4, 1), ncol = 2)
point_mat
## [,1] [,2]
## [1,] 1 1
## [2,] 3 4
## [3,] 5 1
multipoint = st_multipoint(point_mat)
multipoint
## MULTIPOINT (1 1, 3 4, 5 1)
plot(multipoint, cex = 2.5)
linestring = st_cast(multipoint, "LINESTRING")
linestring
## LINESTRING (1 1, 3 4, 5 1)
plot(linestring)
polyg = st_cast(multipoint, "POLYGON")
polyg
## POLYGON ((1 1, 3 4, 5 1, 1 1))
plot(polyg, col = 'lightgrey', border = 'purple', lwd = 4)
st_cast(polyg, 'MULTIPOINT')
## MULTIPOINT (1 1, 3 4, 5 1)
hole_points <- list(
matrix(c(1,1, 3,4, 5,1, 1,1), ncol = 2, byrow = T),
matrix(c(2,2, 3,3, 4,2, 2,2), ncol = 2, byrow = T),
matrix(c(2.25,2.25, 3,2.75, 3.75,2.25, 2.25,2.25), ncol=2, byrow = T),
matrix(c(1,3, 1,4, 2,3.5, 1,3), ncol = 2, byrow =T),
matrix(c(3,3, 5,4, 5,2, 3,3), ncol = 2, byrow =T)
)
holy_polygon <- st_polygon(hole_points)
holy_polygon
## POLYGON ((1 1, 3 4, 5 1, 1 1), (2 2, 3 3, 4 2, 2 2), (2.25 2.25, 3 2.75, 3.75 2.25, 2.25 2.25), (1 3, 1 4, 2 3.5, 1 3), (3 3, 5 4, 5 2, 3 3))
plot(holy_polygon, col = 'lightgrey')
multi_poly <- st_cast(holy_polygon, 'MULTIPOLYGON')
plot(multi_poly, col = 'lightgrey')
# create a polygon
a_poly = st_polygon(list(rbind(c(-1, -1), c(1, -1), c(1, 1), c(-1, -1))))
a = st_sfc(a_poly)
# create a line
l_line = st_linestring(x = matrix(c(-1, 1, 1, -1), ncol = 2))
l = st_sfc(l_line)
# create points
p_matrix = matrix(c(0.5, 1, -1, 0, 0, 1, 1, 1), ncol = 2)
p_multi = st_multipoint(x = p_matrix)
p = st_sf(st_cast(st_sfc(p_multi), "POINT"))
plot(a)
plot(p, add = T, pch = 19)
plot(l, add = T)
p_within <- p %>%
dplyr::filter(st_within(., a, sparse = F))
plot(a)
plot(p_within, add = T, pch = 19)
p_touches <- p %>%
dplyr::filter(st_touches(., a, sparse = F))
plot(a)
plot(p_touches, add = T, pch = 19)
p_intersects <- p %>%
dplyr::filter(st_intersects(., a, sparse = F))
plot(a)
plot(p_intersects, add = T, pch = 19)
p_disjoint <- p %>%
dplyr::filter(st_disjoint(., a, sparse = F))
plot(a)
plot(p_disjoint, add = T, pch = 19)
p_withindist <- p %>%
dplyr::filter(st_is_within_distance(., a, dist = 1, sparse = F))
plot(a)
plot(p_withindist, add = T, pch = 19)
p_close_but_not_inside <- p %>%
dplyr::filter(st_is_within_distance(., a, dist = 1, sparse = F),
st_disjoint(., a, sparse = F))
plot(a)
plot(p_close_but_not_inside, add = T, pch = 19)
line_poly <- st_union(l, a)
p_touches <- p %>%
dplyr::filter(st_touches(., line_poly, sparse = F))
line_poly %>%
st_cast %>%
plot()
plot(p_touches, add = T)
us_states <- st_read('data/cb_2017_us_state_500k/cb_2017_us_state_500k.shp')
## Reading layer `cb_2017_us_state_500k' from data source `/Users/ben/GEODEV2019/data/cb_2017_us_state_500k/cb_2017_us_state_500k.shp' using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -179.1489 ymin: -14.5487 xmax: 179.7785 ymax: 71.36516
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
head(us_states)
## Simple feature collection with 6 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -117.243 ymin: 36.9703 xmax: -71.46455 ymax: 49.00115
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## STATEFP STATENS AFFGEOID GEOID STUSPS NAME LSAD
## 1 54 01779805 0400000US54 54 WV West Virginia 00
## 2 17 01779784 0400000US17 17 IL Illinois 00
## 3 24 01714934 0400000US24 24 MD Maryland 00
## 4 16 01779783 0400000US16 16 ID Idaho 00
## 5 50 01779802 0400000US50 50 VT Vermont 00
## 6 09 01779780 0400000US09 09 CT Connecticut 00
## ALAND AWATER geometry
## 1 62265662566 489840834 MULTIPOLYGON (((-82.6432 38...
## 2 143784114293 6211277447 MULTIPOLYGON (((-91.51297 4...
## 3 25150696145 6980371026 MULTIPOLYGON (((-76.05015 3...
## 4 214048160737 2393355752 MULTIPOLYGON (((-117.2427 4...
## 5 23873457570 1031134839 MULTIPOLYGON (((-73.43774 4...
## 6 12542619303 1815495323 MULTIPOLYGON (((-72.76143 4...
plot(us_states$geometry)
conus <- us_states %>%
dplyr::filter(!NAME %in% c("Alaska",
"American Samoa",
"Commonwealth of the Northern Mariana Islands",
"Hawaii",
"Guam",
"Puerto Rico",
"United States Virgin Islands"
))
plot(conus$geometry)
conus_outline <- st_union(conus$geometry)
conus_outline
## Geometry set for 1 feature
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7631 ymin: 24.5231 xmax: -66.9499 ymax: 49.38436
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## MULTIPOLYGON (((-82.00977 24.53491, -82.00931 2...
plot(conus_outline)
amtrak <- st_read('data/amtrak_rails/amtrak_rails.shp')
## Reading layer `amtrak_rails' from data source `/Users/ben/GEODEV2019/data/amtrak_rails/amtrak_rails.shp' using driver `ESRI Shapefile'
## Simple feature collection with 46 features and 4 fields
## geometry type: MULTILINESTRING
## dimension: XY
## bbox: xmin: -123.2017 ymin: 25.7818 xmax: -69.96816 ymax: 49.24535
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
amtrak
## Simple feature collection with 46 features and 4 fields
## geometry type: MULTILINESTRING
## dimension: XY
## bbox: xmin: -123.2017 ymin: 25.7818 xmax: -69.96816 ymax: 49.24535
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## First 10 features:
## OBJECTID NAME Shape_Leng ShapeSTLen
## 1 1 Acela 741010.5 977920.7
## 2 2 Adirondack 615675.4 843749.7
## 3 3 Auto Train 1474023.7 1776111.7
## 4 4 Blue Water 511438.4 693922.3
## 5 5 California Zephyr 4313889.5 5672841.5
## 6 6 Capitol Limited 1287416.4 1703069.5
## 7 7 Cardinal 1865049.5 2409211.1
## 8 8 Carolinian/Piedmont 1166213.4 1473517.0
## 9 9 Cascades 755629.4 1105895.4
## 10 10 Chicago - St.Louis 454576.7 595026.6
## geometry
## 1 MULTILINESTRING ((-77.01421...
## 2 MULTILINESTRING ((-73.74197...
## 3 MULTILINESTRING ((-81.3177 ...
## 4 MULTILINESTRING ((-87.6361 ...
## 5 MULTILINESTRING ((-108.5559...
## 6 MULTILINESTRING ((-77.00406...
## 7 MULTILINESTRING ((-80.87038...
## 8 MULTILINESTRING ((-80.82749...
## 9 MULTILINESTRING ((-123.0638...
## 10 MULTILINESTRING ((-90.20919...
plot(amtrak$geometry)
plot(conus_outline)
plot(amtrak$geometry, add = T)
state_regions <- data.frame(
NAME = c(state.name, "District of Columbia"),
REGION = c(as.character(state.region), "South")
)
head(state_regions)
## NAME REGION
## 1 Alabama South
## 2 Alaska West
## 3 Arizona West
## 4 Arkansas South
## 5 California West
## 6 Colorado West
conus_w_region <- conus %>%
dplyr::left_join(state_regions)
## Joining, by = "NAME"
## Warning: Column `NAME` joining factors with different levels, coercing to
## character vector
head(conus_w_region)
## Simple feature collection with 6 features and 10 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -117.243 ymin: 36.9703 xmax: -71.46455 ymax: 49.00115
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## STATEFP STATENS AFFGEOID GEOID STUSPS NAME LSAD
## 1 54 01779805 0400000US54 54 WV West Virginia 00
## 2 17 01779784 0400000US17 17 IL Illinois 00
## 3 24 01714934 0400000US24 24 MD Maryland 00
## 4 16 01779783 0400000US16 16 ID Idaho 00
## 5 50 01779802 0400000US50 50 VT Vermont 00
## 6 09 01779780 0400000US09 09 CT Connecticut 00
## ALAND AWATER REGION geometry
## 1 62265662566 489840834 South MULTIPOLYGON (((-82.6432 38...
## 2 143784114293 6211277447 North Central MULTIPOLYGON (((-91.51297 4...
## 3 25150696145 6980371026 South MULTIPOLYGON (((-76.05015 3...
## 4 214048160737 2393355752 West MULTIPOLYGON (((-117.2427 4...
## 5 23873457570 1031134839 Northeast MULTIPOLYGON (((-73.43774 4...
## 6 12542619303 1815495323 Northeast MULTIPOLYGON (((-72.76143 4...
cbsa <- read_sf('data/cb_2017_us_cbsa_500k/cb_2017_us_cbsa_500k.shp')
head(cbsa)
## Simple feature collection with 6 features and 8 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -98.95394 ymin: 32.84464 xmax: -79.4987 ymax: 43.8495
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## # A tibble: 6 x 9
## CSAFP CBSAFP AFFGEOID GEOID NAME LSAD ALAND AWATER
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 <NA> 43620 310M300… 43620 Siou… M1 6.67e 9 2.79e7
## 2 357 12660 310M300… 12660 Bara… M2 2.15e 9 4.57e7
## 3 <NA> 40220 310M300… 40220 Roan… M1 4.84e 9 7.25e7
## 4 <NA> 48660 310M300… 48660 Wich… M1 6.78e 9 1.44e8
## 5 290 22840 310M300… 22840 Fort… M2 2.01e 9 4.12e6
## 6 122 12060 310M300… 12060 Atla… M1 2.25e10 3.93e8
## # … with 1 more variable: geometry <MULTIPOLYGON [°]>
plot(cbsa$geometry)
conus_cbsa <- cbsa %>%
# dplyr::mutate(STUSPS = stringr::str_extract(NAME, '(?<=, )[^,]+$')) %>%
dplyr::filter(st_intersects(., conus_outline, sparse = F))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
plot(conus_cbsa$geometry)
amtrak_cbsa <- conus_cbsa %>%
dplyr::filter(st_intersects(., amtrak, sparse = T) %>%
purrr::map_lgl(function(x){
!identical(x, integer(0))
}))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
plot(conus_outline)
plot(amtrak$geometry, add = T)
plot(amtrak_cbsa$geometry, add = T)
conus_regions <- conus_w_region %>%
dplyr::group_by(REGION) %>%
tidyr::nest() %>%
{
purrr::map2(.$data, .$REGION, function(d, r){
d %>%
st_union() %>% #unions all features in subregion
st_sf() %>% #converts to sf (data frame type object)
dplyr::mutate(REGION = r) #remember the name of the region
}
)} %>%
do.call(rbind, .)
conus_regions
## Simple feature collection with 4 features and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7631 ymin: 24.5231 xmax: -66.9499 ymax: 49.38436
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## REGION geometry
## 1 South MULTIPOLYGON (((-82.00977 2...
## 2 North Central MULTIPOLYGON (((-83.48237 4...
## 3 West MULTIPOLYGON (((-118.6105 3...
## 4 Northeast MULTIPOLYGON (((-70.62678 4...
plot(conus_regions$geometry)
northeast <- conus_regions %>%
dplyr::filter(REGION == 'Northeast')
northeast_amtrak <- amtrak %>%
dplyr::filter(st_intersects(., northeast, sparse = F))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
northeast_amtrak_cbsa <- amtrak_cbsa %>%
dplyr::filter(st_intersects(., northeast, sparse = F))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
plot(northeast$geometry)
plot(northeast_amtrak_cbsa$geometry, col = 'lightgrey', add = T)
plot(northeast_amtrak$geometry, col = 'red', lwd = 1, add = T)
census_data <- readr::read_csv('data/PEP_2017.csv')
## Parsed with column specification:
## cols(
## NAME = col_character(),
## GEOID = col_double(),
## respop72017 = col_double()
## )
census_data
## # A tibble: 393 x 3
## NAME GEOID respop72017
## <chr> <dbl> <dbl>
## 1 United States NA 325719178
## 2 In metropolitan statistical area NA 279698020
## 3 Abilene, TX Metro Area 10180 170219
## 4 Akron, OH Metro Area 10420 703505
## 5 Albany, GA Metro Area 10500 151434
## 6 Albany, OR Metro Area 10540 125047
## 7 Albany-Schenectady-Troy, NY Metro Area 10580 886188
## 8 Albuquerque, NM Metro Area 10740 910726
## 9 Alexandria, LA Metro Area 10780 153984
## 10 Allentown-Bethlehem-Easton, PA-NJ Metro Area 10900 840550
## # … with 383 more rows
census_data <- census_data %>%
dplyr::mutate(NAME = stringr::str_replace(NAME, ' Metro Area$', ''),
GEOID = as.character(GEOID))
amtrak_cbsa <- amtrak_cbsa %>%
dplyr::left_join(census_data)
## Joining, by = c("GEOID", "NAME")
northeast_amtrak_cbsa <- northeast_amtrak_cbsa %>%
dplyr::left_join(census_data)
## Joining, by = c("GEOID", "NAME")
plot(northeast$geometry)
plot(northeast_amtrak_cbsa['respop72017'], add = T)
plot(northeast_amtrak$geometry, col = 'red', lwd = 1, add = T)
amtrak_points <- st_cast(northeast_amtrak, 'POINT')
## Warning in st_cast.sf(northeast_amtrak, "POINT"): repeating attributes for
## all sub-geometries for which they may not be constant
amtrak_lines <- st_cast(northeast_amtrak, 'LINESTRING')
## Warning in st_cast.sf(northeast_amtrak, "LINESTRING"): repeating attributes
## for all sub-geometries for which they may not be constant
plot(amtrak_points$geometry)
amtrak_cbsa_points <- northeast_amtrak_cbsa %>%
dplyr::select(NAME, respop72017) %>%
st_join(amtrak_points, .)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
amtrak_cbsa_lines <- northeast_amtrak_cbsa %>%
dplyr::select(NAME, respop72017) %>%
st_join(amtrak_lines, ., largest = T)
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries
amtrak_cbsa_points
## Simple feature collection with 267797 features and 6 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: -90.11523 ymin: 25.84845 xmax: -69.96816 ymax: 45.50603
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## First 10 features:
## OBJECTID NAME.x Shape_Leng ShapeSTLen
## 1 1 Acela 741010.5 977920.7
## 1.1 1 Acela 741010.5 977920.7
## 1.2 1 Acela 741010.5 977920.7
## 1.3 1 Acela 741010.5 977920.7
## 1.4 1 Acela 741010.5 977920.7
## 1.5 1 Acela 741010.5 977920.7
## 1.6 1 Acela 741010.5 977920.7
## 1.7 1 Acela 741010.5 977920.7
## 1.8 1 Acela 741010.5 977920.7
## 1.9 1 Acela 741010.5 977920.7
## NAME.y respop72017
## 1 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## 1.1 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## 1.2 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## 1.3 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## 1.4 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## 1.5 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## 1.6 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## 1.7 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## 1.8 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## 1.9 Washington-Arlington-Alexandria, DC-VA-MD-WV 6216589
## geometry
## 1 POINT (-77.01421 38.8836)
## 1.1 POINT (-77.01371 38.88349)
## 1.2 POINT (-77.01356 38.88345)
## 1.3 POINT (-77.01334 38.88342)
## 1.4 POINT (-77.01308 38.88339)
## 1.5 POINT (-77.01293 38.88339)
## 1.6 POINT (-77.01269 38.88338)
## 1.7 POINT (-77.01266 38.88338)
## 1.8 POINT (-77.01227 38.88341)
## 1.9 POINT (-77.01205 38.88343)
amtrak_cbsa_lines
## Simple feature collection with 322 features and 6 fields
## geometry type: LINESTRING
## dimension: XY
## bbox: xmin: -90.11523 ymin: 25.84845 xmax: -69.96816 ymax: 45.50603
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## First 10 features:
## OBJECTID NAME.x Shape_Leng ShapeSTLen
## 1 1 Acela 741010.5 977920.7
## 1.1 1 Acela 741010.5 977920.7
## 1.2 1 Acela 741010.5 977920.7
## 1.3 1 Acela 741010.5 977920.7
## 1.4 1 Acela 741010.5 977920.7
## 1.5 1 Acela 741010.5 977920.7
## 1.6 1 Acela 741010.5 977920.7
## 1.7 1 Acela 741010.5 977920.7
## 1.8 1 Acela 741010.5 977920.7
## 1.9 1 Acela 741010.5 977920.7
## NAME.y respop72017
## 1 Philadelphia-Camden-Wilmington, PA-NJ-DE-MD 6096120
## 1.1 New York-Newark-Jersey City, NY-NJ-PA 20320876
## 1.2 New York-Newark-Jersey City, NY-NJ-PA 20320876
## 1.3 New York-Newark-Jersey City, NY-NJ-PA 20320876
## 1.4 New York-Newark-Jersey City, NY-NJ-PA 20320876
## 1.5 New York-Newark-Jersey City, NY-NJ-PA 20320876
## 1.6 New York-Newark-Jersey City, NY-NJ-PA 20320876
## 1.7 New York-Newark-Jersey City, NY-NJ-PA 20320876
## 1.8 New York-Newark-Jersey City, NY-NJ-PA 20320876
## 1.9 New York-Newark-Jersey City, NY-NJ-PA 20320876
## geometry
## 1 LINESTRING (-77.01421 38.88...
## 1.1 LINESTRING (-74.1708 40.727...
## 1.2 LINESTRING (-74.1708 40.727...
## 1.3 LINESTRING (-74.15611 40.73...
## 1.4 LINESTRING (-74.11723 40.74...
## 1.5 LINESTRING (-74.115 40.7466...
## 1.6 LINESTRING (-74.115 40.7466...
## 1.7 LINESTRING (-73.93893 40.74...
## 1.8 LINESTRING (-73.91594 40.75...
## 1.9 LINESTRING (-73.91594 40.75...
plot(amtrak_cbsa_lines['respop72017'])
plot(northeast$geometry, add = T)
amtrak_cbsa_points <- amtrak_cbsa_points %>%
dplyr::select(rail = NAME.x,
city = NAME.y,
pop2017 = respop72017) %>%
dplyr::mutate(rail = as.character(rail),
city = as.character(city))
plot(northeast$geometry, col = 'grey90')
plot(northeast_amtrak$geometry, col = 'darkslategrey', lwd = 1, add = T)
plot(amtrak_cbsa_points['pop2017'], cex = .25, add = T)
library(leaflet)
center_conus <- st_centroid(conus_outline) %>%
st_coordinates()
## Warning in st_centroid.sfc(conus_outline): st_centroid does not give
## correct centroids for longitude/latitude data
print('')
## [1] ""
center_conus
## X Y
## 1 -99.39245 39.39516
m <- leaflet() %>%
setView(lng = center_conus[1],
lat = center_conus[2],
zoom = 3) %>%
addTiles() # Add default OpenStreetMap map tiles
m
m <- m %>%
addPolygons(data = conus_regions,
color = 'black',
weight = 1,
fillColor = 'white',
fillOpacity = .1)
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
m
pal <- colorNumeric(
palette = c("#edf8b1", "#2c7fb8"),
domain = amtrak_cbsa$respop72017)
m <- m %>%
addPolygons(data = amtrak_cbsa,
fillColor = ~pal(respop72017),
fillOpacity = .8,
color = 'black',
weight = 1,
label = ~paste(NAME, "'s Expected Population for 2017 was", respop72017)
) %>%
addLegend("bottomleft",
data = amtrak_cbsa,
pal = pal,
values = ~respop72017,
title = "Est. Population (2017)",
opacity = 1
)
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
m
m %>%
addPolylines(data = amtrak,
color = 'red',
weight = 3)
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'